#!/usr/bin/python
# -*- coding: utf-8 -*-

import re
import socket
import urllib

socket.setdefaulttimeout(30)

ids = open('/home/chaojiansong/videobook/notinsvn/recrawl_list', 'r')
out = open('/home/chaojiansong/videobook/notinsvn/pages.recrawl', 'w')
for line in ids:
    id = int(line)
    print id
    try:
        data = urllib.urlopen('http://baike.baidu.com/view/' + str(id) + '.htm').read()
        data = re.sub(r'\n', ' ',  data)
        out.write(data + '\n')
    except Exception, why:
        print why, 'id:', id

out.close()
ids.close()
